import json
input_path = "mmlu1.jsonl"  
business_path = "business.jsonl"
engineering_path = "engineering.jsonl"

with open(input_path, "r", encoding="utf-8") as fin, \
     open(business_path, "w", encoding="utf-8") as fout_business, \
     open(engineering_path, "w", encoding="utf-8") as fout_engineering:
    for line in fin:
        line = line.strip()
        if not line:
            continue
        try:
            item = json.loads(line)
        except json.JSONDecodeError:
            print(f"跳过无法解析的行: {line}")
            continue
        
        category = item.get("category", "").lower()
        if category == "business":
            fout_business.write(json.dumps(item, ensure_ascii=False) + "\n")
        elif category == "engineering":
            fout_engineering.write(json.dumps(item, ensure_ascii=False) + "\n")

print("筛选完成，生成 business.jsonl 和 engineering.jsonl 文件。")